{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# COMPSCI 389: Introduction to Machine Learning\n", "# Topic 4.0 Model Evaluation\n", "\n", "In this notebook we will consider ways of evaluating how effective supervised learning algorithms are.\n", "\n", "Let's start with the imports that we will use in this notebook:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from sklearn.neighbors import KDTree\n", "from sklearn.base import BaseEstimator\n", "import numpy as np\n", "\n", "# New this time:\n", "from sklearn.model_selection import train_test_split # For splitting into training and testing sets (more on this below!)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Next, let's load and display the GPA data set:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | physics | \n", "biology | \n", "history | \n", "English | \n", "geography | \n", "literature | \n", "Portuguese | \n", "math | \n", "chemistry | \n", "gpa | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "622.60 | \n", "491.56 | \n", "439.93 | \n", "707.64 | \n", "663.65 | \n", "557.09 | \n", "711.37 | \n", "731.31 | \n", "509.80 | \n", "1.33333 | \n", "
1 | \n", "538.00 | \n", "490.58 | \n", "406.59 | \n", "529.05 | \n", "532.28 | \n", "447.23 | \n", "527.58 | \n", "379.14 | \n", "488.64 | \n", "2.98333 | \n", "
2 | \n", "455.18 | \n", "440.00 | \n", "570.86 | \n", "417.54 | \n", "453.53 | \n", "425.87 | \n", "475.63 | \n", "476.11 | \n", "407.15 | \n", "1.97333 | \n", "
3 | \n", "756.91 | \n", "679.62 | \n", "531.28 | \n", "583.63 | \n", "534.42 | \n", "521.40 | \n", "592.41 | \n", "783.76 | \n", "588.26 | \n", "2.53333 | \n", "
4 | \n", "584.54 | \n", "649.84 | \n", "637.43 | \n", "609.06 | \n", "670.46 | \n", "515.38 | \n", "572.52 | \n", "581.25 | \n", "529.04 | \n", "1.58667 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
43298 | \n", "519.55 | \n", "622.20 | \n", "660.90 | \n", "543.48 | \n", "643.05 | \n", "579.90 | \n", "584.80 | \n", "581.25 | \n", "573.92 | \n", "2.76333 | \n", "
43299 | \n", "816.39 | \n", "851.95 | \n", "732.39 | \n", "621.63 | \n", "810.68 | \n", "666.79 | \n", "705.22 | \n", "781.01 | \n", "831.76 | \n", "3.81667 | \n", "
43300 | \n", "798.75 | \n", "817.58 | \n", "731.98 | \n", "648.42 | \n", "751.30 | \n", "648.67 | \n", "662.05 | \n", "773.15 | \n", "835.25 | \n", "3.75000 | \n", "
43301 | \n", "527.66 | \n", "443.82 | \n", "545.88 | \n", "624.18 | \n", "420.25 | \n", "676.80 | \n", "583.41 | \n", "395.46 | \n", "509.80 | \n", "2.50000 | \n", "
43302 | \n", "512.56 | \n", "415.41 | \n", "517.36 | \n", "532.37 | \n", "592.30 | \n", "382.20 | \n", "538.35 | \n", "448.02 | \n", "496.39 | \n", "3.16667 | \n", "
43303 rows × 10 columns
\n", "\n", " | physics | \n", "biology | \n", "history | \n", "English | \n", "geography | \n", "literature | \n", "Portuguese | \n", "math | \n", "chemistry | \n", "
---|---|---|---|---|---|---|---|---|---|
28091 | \n", "424.58 | \n", "409.10 | \n", "630.61 | \n", "632.05 | \n", "535.21 | \n", "597.34 | \n", "599.84 | \n", "511.14 | \n", "435.55 | \n", "
5059 | \n", "646.34 | \n", "730.26 | \n", "625.33 | \n", "498.99 | \n", "582.52 | \n", "628.91 | \n", "559.91 | \n", "611.36 | \n", "771.44 | \n", "
37171 | \n", "505.06 | \n", "585.74 | \n", "573.60 | \n", "542.00 | \n", "566.37 | \n", "622.29 | \n", "375.83 | \n", "665.16 | \n", "638.87 | \n", "
37197 | \n", "399.97 | \n", "490.58 | \n", "461.35 | \n", "439.84 | \n", "479.78 | \n", "404.52 | \n", "422.41 | \n", "417.93 | \n", "488.64 | \n", "
30458 | \n", "676.03 | \n", "743.46 | \n", "653.00 | \n", "595.96 | \n", "584.78 | \n", "682.12 | \n", "577.30 | \n", "573.07 | \n", "651.62 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
7950 | \n", "480.19 | \n", "467.69 | \n", "519.39 | \n", "415.53 | \n", "555.47 | \n", "413.44 | \n", "409.56 | \n", "491.42 | \n", "462.23 | \n", "
33895 | \n", "576.49 | \n", "526.81 | \n", "561.79 | \n", "733.88 | \n", "627.47 | \n", "646.09 | \n", "649.64 | \n", "653.90 | \n", "613.73 | \n", "
1334 | \n", "389.58 | \n", "401.12 | \n", "308.93 | \n", "434.19 | \n", "451.13 | \n", "386.36 | \n", "484.20 | \n", "535.33 | \n", "394.41 | \n", "
16687 | \n", "404.59 | \n", "467.41 | \n", "647.27 | \n", "573.02 | \n", "573.54 | \n", "544.47 | \n", "617.50 | \n", "422.61 | \n", "473.46 | \n", "
39688 | \n", "465.34 | \n", "345.31 | \n", "451.07 | \n", "373.54 | \n", "497.07 | \n", "507.35 | \n", "390.95 | \n", "374.44 | \n", "484.54 | \n", "
25981 rows × 9 columns
\n", "